################################################################
# Replication of Lehrer/Juhl/Gschwend                          #
# "The Wisdom of Crowds Design for Sensitive Survey Questions" #
# Created by Roni Lehrer                                       #
# Last changes made on: 4 October 2018                         #
#                                                              #
# see end of script for information on R and packages versions #
################################################################

rm(list=ls())        
library(data.table)  
library(ggplot2)     
library(boot)        
library(readstata13) 

#################################################################################################
# The dataset for this replication script is Wave 30 of the German Internet Panel.              #
# It is stored at the Gesis Data Archive as:                                                    #
#                                                                                               #
# Blom, Annelies G.; Felderer, Barbara; Herzing, Jessica; Krieger, Ulrich; Rettig, Tobias;      #
# SFB 884 �Political Economy of Reforms�, Universit�t Mannheim (2018): German Internet Panel,   #
# Welle 30 (Juli 2017). GESIS Datenarchiv, K�ln. ZA6904 Datenfile Version 2.0.0,                #
# doi:10.4232/1.13153                                                                           #
#                                                                                               #
# Internet: http://dx.doi.org/10.4232/1.13153                                                   #
# Gesis Study Number : ZA6904                                                                   #
#################################################################################################

#load data
 gip <- as.data.table(read.dta13("ZA6904_v2-0-0.dta", convert.factors=F, convert.dates=F))


# variable selection
data <- data.table(gip$id_g, gip$expCL30001, gip$AA30039, gip$CL30001, gip$CL30002
                   ,gip$CL30003, gip$CL30004, gip$CL30005, gip$CL30012,
                   gip$gender_16, gip$year_of_birth_cat_16, gip$educ_school_16,
                   gip$online_status, gip$state_16, gip$dDatum, gip$educ_job_16,
                   gip$dauer_f016, gip$dauer_f050, gip$dauer_f051
                    ,gip$dauer_f052, gip$dauer_f053, gip$dauer_f054, gip$dauer_f055)
colnames(data) <- c("id","group","DQ","RRT","ICT1ctrl","ICT1trt","ICT2trt","ICT2ctrl","crowd",
                    "gender", "year_of_birth_cat_16", "educ_school_16", "online_status", "state_16",
                    "dDatum", "educ_job_16",
                    "dDQ","dRRT","dICT1ctrl","dICT1trt","dICT2trt","dICT2ctrl","dcrowd")

data$east <- ifelse(data$state_16>10,1,0) #dummy for East Germany and Berlin
data$internet <- ifelse(data$online_status>0,1,0) #dummy for internet access
data$afd <- ifelse(data$DQ==11,1,0) #dummy for AfD vote intention
data$online_status <- NULL #remove old variable
data$state_16 <- NULL      #remove old variable
data.full <- data

#################################################
# Sub groups results wisdom of crowds, Figure 1 #
#################################################

sub.result <- matrix(NA,ncol=3, nrow=6)
rownames(sub.result) <- c("all","afd","east", "west", "high knowledge", "median")
colnames(sub.result) <- c("lowerCI","Estimate","upperCI")


se.mean <- function(x){
    x <- na.omit(x)
    return(sd(x)/sqrt(length(x)))
}
data$crowd <- ifelse(data$crowd==-90, NA, data$crowd) #remove missing cases

#all cases
est <- mean(data$crowd, na.rm=TRUE)
upper <- est + 1.96*se.mean(data$crowd)
lower <- est - 1.96*se.mean(data$crowd)
sub.result[1,] <- cbind(lower, est, upper)

#AfD Voters
est <- mean(data$crowd[data$afd==1], na.rm=TRUE)
upper <- est + 1.96*se.mean(data$crowd[data$afd==1])
lower <- est - 1.96*se.mean(data$crowd[data$afd==1])
sub.result[2,] <- cbind(lower, est, upper)

#East Germany
est <- mean(data$crowd[data$east==1], na.rm=TRUE)
upper <- est + 1.96*se.mean(data$crowd[data$east==1])
lower <- est - 1.96*se.mean(data$crowd[data$east==1])
sub.result[3,] <- cbind(lower, est, upper)

#West Germany
est <- mean(data$crowd[data$east==0], na.rm=TRUE)
upper <- est + 1.96*se.mean(data$crowd[data$east==0])
lower <- est - 1.96*se.mean(data$crowd[data$east==0])
sub.result[4,] <- cbind(lower, est, upper)

#Unversity degree
data$university <- ifelse(data$educ_job_16%in%c(10, 11), 1, 0)
est <- mean(data$crowd[data$university==1], na.rm=TRUE)
upper <- est + 1.96*se.mean(data$crowd[data$university==1])
lower <- est - 1.96*se.mean(data$crowd[data$university==1])
sub.result[5,] <- cbind(lower, est, upper)

#all cases: median
est <- mean(data$crowd, na.rm=TRUE)
upper <- est + 1.96*1.253*se.mean(data$crowd) #this is the large sample size SE for the median
lower <- est - 1.96*1.253*se.mean(data$crowd)
sub.result[6,] <- cbind(lower, est, upper)


labs <- row.names(sub.result)
sub.result <- as.data.table(sub.result)
sub.result$type <- labs

sub.result$y <- ifelse(sub.result$type=="all", 6,
                   ifelse(sub.result$type=="afd", 5,
                          ifelse(sub.result$type=="east", 4,
                                 ifelse(sub.result$type=="west", 3,
                                        ifelse(sub.result$type=="high knowledge", 2, 1)))))
sub.result$lab <- factor(sub.result$y,
                         labels=rev(c("All Respondents", "AfD Voters", "East Germans",
                             "West Germans", "Graduate Degree", "Median Estimator")))
p <- ggplot(data=sub.result, aes(x=Estimate, group=type, y=lab))
p <- p + geom_vline(aes(xintercept=12.6), linetype=2)
p <- p + geom_segment(aes(yend=lab, x=lowerCI, xend=upperCI))
p <- p + geom_point()
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank(), text = element_text(size=16))
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_text(aes(y=y+.125, label=round(Estimate, 1)))
p <- p + xlab("Expected AfD Vote Share (in %)")
p <- p + ylab("")
p

#############################
# LOESS over time, Figure 2 #
#############################
data2 <- data.full
data2$crowd <- ifelse(data2$crowd==-90, NA, data2$crowd) #remove missings
data2 <- data2[is.na(crowd)==FALSE]

## #factors to convert number of observations into bar length
## summary(lm(c(7.5, 14)~c(2,345)))
## summary(lm(c(2,345)~c(7.5,14)))

#count number of observations per day
counts <- data2[,c("count", "mean"):=list(.N, mean(crowd)), dDatum]
counts <- unique(counts[,list(dDatum, count, mean)])
counts$x <- as.numeric(as.Date("2017-09-27")-counts$dDatum)
counts$y <- counts$count*0.01895+7.46210
setkey(counts, dDatum)

p <- ggplot(data=data2, aes(x=as.numeric(as.Date("2017-09-27")-dDatum)))
p <- p + geom_rect(aes(xmin=x-.5, xmax=x+.5, ymin=0, ymax=y), fill=NA, color="black", data=counts)
p <- p + geom_smooth(aes(y=crowd), span=.2, method="loess", formula="y~x", colour="black",
                     fullrange=TRUE)
p <- p + geom_point(aes(x=x, y=mean), data=counts)
p <- p + geom_hline(aes(yintercept=12.6), linetype=2)
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank(), text = element_text(size=16))
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + xlab("Days to Election")
p <- p + ylab("Expected AfD vote share in %")
p <- p + scale_y_continuous(sec.axis = sec_axis(~.*52.77-393.77, name="Number of observations"))
p <- p + coord_cartesian(ylim=c(7.5, 14))
p <- p + scale_x_reverse()
p


#######################################
# Table 2: Question Type and Missings #
#######################################
#keep only respondents that see (not answer) all of our questions
data <- data.full[is.na(crowd)==FALSE]

respondents <- c(nrow(data),
              nrow(data),   
              nrow(data[group==1]),
              nrow(data[group==2|group==3]),
              nrow(data[group==2]),
              nrow(data[group==3])
              )
missings <- c(nrow(data[crowd<0]),
              nrow(data[DQ<0]),
              nrow(data[group==1&RRT<0]),
              nrow(data[(group==2&(ICT1ctrl<0|ICT1trt<0))|(group==3&(ICT2ctrl<0|ICT2trt<0))]),
              nrow(data[group==2&(ICT1ctrl<0|ICT1trt<0)]),
              nrow(data[group==3&(ICT2ctrl<0|ICT2trt<0)])
              )
tab2 <- cbind(c("Widsom of Crowds", "Direct Question", "Crowise-Model RRT",
                "Double List Experiment", "List A", "List B"),
              respondents,
              round(respondents/max(respondents)*100, digit=1),
              missings,
              round(missings/respondents*100, digit=1)
              )
tab2

##################################
# Bootstrapped Results, Figure 3 #
##################################

### Combined Results Matrix
result <- matrix(NA,ncol=3,nrow=4)
rownames(result) <- c("DQ","WoC","RRT","D-ICT")
colnames(result) <- c("lowerCI","Estimate","upperCI")

## 1. Direct Question: RESAMPLING subsample
data <- as.data.frame(data)
boots <- c()
set.seed(834950)
for (i in 1:1000) {
    dat <- sample(1:nrow(data), 865, TRUE) #sample rows randomly with replacement
    #compute mean of these selected rows and save in boots
    boots <- c(boots, table(data$DQ[dat]==11)[2]/table(data$DQ[dat]%in%c(-99, -98, 1, 2))[1])
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[1,] <- cbind(low*100,est*100,up*100)

### 2a. Wisdom of Crowds: RESAMPLING subsample
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(1:nrow(data), 865, TRUE)
    dat <- data$crowd[dat]
    dat <- ifelse(dat==-90, NA, dat)
    boots <- c(boots, mean(dat, na.rm=TRUE))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[2,] <- cbind(low,est,up)

### 2b. Wisdom of Crowds: RESAMPLING full sample size (for footnote)
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(1:nrow(data), nrow(data), TRUE)
    dat <- data$crowd[dat]
    dat <- ifelse(dat==-90, NA, dat)
    boots <- c(boots, mean(dat, na.rm=TRUE))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)

### 3. Crosswise RRT: RESAMPLING
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(as.numeric(rownames(data[data$group==1,])), 865, TRUE)
    dat <- data[dat,"RRT"]
    dat <- dat[is.na(dat)==FALSE&dat>0]
    n <- length(dat) # number of responses
    r <- length(dat[dat==1]) # Number of "Yes" answers
    boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[3,] <- cbind(low*100,est*100,up*100)

### 4. Double List Experiment: RESAMPLING
A <- cbind(data$ICT1ctrl,data$ICT1trt)
B <- cbind(data$ICT2ctrl,data$ICT2trt)
for(i in 1:2){
  A[A[,i]<0,i] <- NA
  B[B[,i]<0,i] <- NA
}
A <- A[complete.cases(A),]
B <- B[complete.cases(B),]

est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2

double.list <- function(dat, indices) {
    A <- dat[indices,1:2]
    B <- dat[indices,3:4]
    return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
}

booted <- boot(cbind(A[1:nrow(B),],B), double.list, R=1000)
low <- quantile(booted$t, probs=.025)
up <- quantile(booted$t, probs=.975)
# results
cbind(low,est,up)
result[4,] <- cbind(low*100,est*100,up*100)

result <- cbind(result, rownames(result))
colnames(result)[4] <- "type"
result <- as.data.table(result)
result <- result[type!="ICT1"&type!="ICT2"]
result$Estimate <- as.numeric(result$Estimate)
result$lowerCI <- as.numeric(result$lowerCI)
result$upperCI <- as.numeric(result$upperCI)
result$y <- ifelse(result$type=="DQ", 3,
                   ifelse(result$type=="D-ICT", 1,
                          ifelse(result$type=="RRT", 2, 4)))
result$lab <- factor(result$y, labels=c("Double List Experiment", "Crosswise-Model RRT",
                                   "Direct Question", "Wisdom of Crowds"))
p <- ggplot(data=result, aes(x=Estimate, group=type, y=lab))
p <- p + geom_vline(aes(xintercept=12.6), linetype=2)
p <- p + geom_segment(aes(yend=lab, x=lowerCI, xend=upperCI))
p <- p + geom_point()
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank(), text = element_text(size=16))
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_text(aes(y=y+.08, label=round(Estimate, 1)))
## p <- p + scale_x_continuous(breaks=c(5, 10, 12.6, 15, 20, 25, 30),
##                             labels=c("5", "10", "12.6", "15", "20", "25", "30"),
##                             expand=expand_scale(add=c(3,3)))
p <- p + xlab("Expected AfD Vote Share (in %)")
p <- p + ylab("")
p

#######################################
# Seconds on Question Page, Figure A1 #
#######################################

data$dDQ <- data$dDQ/1000
data$dRRT <- data$dRRT/1000
data$dICT1ctrl <- data$dICT1ctrl/1000
data$dICT1trt <- data$dICT1trt/1000
data$dICT2ctrl <- data$dICT2ctrl/1000
data$dICT2trt <- data$dICT2trt/1000
data$dcrowd <- data$dcrowd/1000

### Combined Results Matrix
presult <- array(NA, dim=c(4,3,61),
                 dimnames=list(c("DQ","WoC","RRT", "D-ICT"),
                               c("lowerCI","Estimate","upperCI"),
                               c()))


data <- as.data.frame(data)
for (t in 1:61) {
    print(t)
 ## 1. Direct Question: RESAMPLING
 boots <- c()
 set.seed(834950)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$dDQ>=t,])), 865, TRUE)
     boots <- c(boots, table(data$DQ[dat]==11)[2]/table(data$DQ[dat]%in%c(-99, -98, 1, 2))[1])
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 presult[1,,t] <- cbind(low*100,est*100,up*100)

 ### 2. Wisdom of Crowds: RESAMPLING
 boots <- c()
 set.seed(133707)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$dcrowd>=t,])), 865, TRUE)
     dat <- data$crowd[dat]
     dat <- ifelse(dat==-90, NA, dat)
     boots <- c(boots, mean(dat, na.rm=TRUE))
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 presult[2,,t] <- cbind(low,est,up)

 ### 3. Crosswise RRT: RESAMPLING
 boots <- c()
 set.seed(133707)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$group==1&data$dRRT>=t,])), 865, TRUE)
     dat <- data[dat,"RRT"]
     dat <- dat[is.na(dat)==FALSE&dat>0]
     n <- length(dat) # number of responses
     r <- length(dat[dat==1]) # Number of "Yes" answers
     boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 presult[3,,t] <- cbind(low*100,est*100,up*100)

 #my double ict
  A <- cbind(data$ICT1ctrl[data$dICT1ctrl>=t&data$dICT1trt>=t],
             data$ICT1trt[data$dICT1ctrl>=t&data$dICT1trt>=t])
  B <- cbind(data$ICT2ctrl[data$dICT2ctrl>=t&data$dICT2trt>=t],
             data$ICT2trt[data$dICT2ctrl>=t&data$dICT2trt>=t])
 for(i in 1:2){
   A[A[,i]<0,i] <- NA
   B[B[,i]<0,i] <- NA
 }
 A <- A[complete.cases(A),]
 B <- B[complete.cases(B),]

 est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2

 double.list <- function(dat, indices) {
     A <- dat[indices,1:2]
     B <- dat[indices,3:4]
     return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
 }
 mini <- min(nrow(A), nrow(B))
 booted <- boot(cbind(A[1:mini,],B[1:mini,]), double.list, R=1000)
 low <- quantile(booted$t, probs=.025)
 up <- quantile(booted$t, probs=.975)
 # results
 cbind(low,est,up)
 presult[4,,t] <- cbind(low*100,est*100,up*100)
}

pdata <- matrix(nrow=0, ncol=4)
for (i in 1:dim(presult)[3]) {
    pdata <- rbind(pdata, cbind(presult[,,i], i))
}
pdata <- cbind(pdata, rownames(pdata))
colnames(pdata) <- c("lowerCI", "Estimate", "upperCI", "t", "type")
pdata <- as.data.table(pdata)
pdata$Estimate <- as.numeric(pdata$Estimate)
pdata$t <- as.numeric(pdata$t)
pdata$lowerCI <- as.numeric(pdata$lowerCI)
pdata$upperCI <- as.numeric(pdata$upperCI)
pdata$y <- ifelse(pdata$type=="DQ", 4,
                   ifelse(pdata$type=="D-ICT", 3,
                          ifelse(pdata$type=="RRT", 2, 1)))

pdata$lab <- factor(pdata$y, labels=c("Wisdom of Crowds", "Crosswise-Model RRT",
                                 "Double List Experiment",
                                  "Direct Question"))
p <- ggplot(data=pdata[type!="ICT1"&type!="ICT2"], aes(x=t, group=type))
p <- p + geom_line(aes(y=Estimate, linetype=as.factor(lab)))
## p <- p + geom_ribbon(aes(ymin=lowerCI,
##                                 ymax=upperCI, fill=as.factor(type)))
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank(), text = element_text(size=16))
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_hline(aes(yintercept=12.6), linetype=2)
p <- p + theme(legend.key.height=unit(1.5,"line"))
p <- p + xlab("Minimal Response Time (in Seconds)")
p <- p + ylab("Expected AfD Vote Share (in %)")
p


##############################
# AFD VOTERS ONLY, Figure A2 #
##############################
data <- as.data.table(data)
data <- data[DQ==11]


### Combined Results Matrix
result <- matrix(NA,ncol=3,nrow=3)
rownames(result) <- c("WoC","RRT","D-ICT")
colnames(result) <- c("lowerCI","Estimate","upperCI")


data <- as.data.frame(data)
### 2. Wisdom of Crowds: RESAMPLING
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(1:nrow(data), 178, TRUE)
    dat <- data$crowd[dat]
    dat <- ifelse(dat==-90, NA, dat)
    boots <- c(boots, mean(dat, na.rm=TRUE))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[1,] <- cbind(low,est,up)


### 3. Crosswise RRT: RESAMPLING
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(as.numeric(rownames(data[data$group==1,])), 178, TRUE)
    dat <- data[dat,"RRT"]
    dat <- dat[is.na(dat)==FALSE&dat>0]
    n <- length(dat) # number of responses
    r <- length(dat[dat==1]) # Number of "Yes" answers
    boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[2,] <- cbind(low*100,est*100,up*100)

#my double ict
A <- cbind(data$ICT1ctrl,data$ICT1trt)
B <- cbind(data$ICT2ctrl,data$ICT2trt)
for(i in 1:2){
  A[A[,i]<0,i] <- NA
  B[B[,i]<0,i] <- NA
}
A <- A[complete.cases(A),]
B <- B[complete.cases(B),]

est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2

double.list <- function(dat, indices) {
    A <- dat[indices,1:2]
    B <- dat[indices,3:4]
    return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
}
booted <- boot(cbind(A,B[1:nrow(A),]), double.list, R=1000)
low <- quantile(booted$t, probs=.025)
up <- quantile(booted$t, probs=.975)
# results
cbind(low,est,up)
result[3,] <- cbind(low*100,est*100,up*100)

result <- cbind(result, rownames(result))
colnames(result)[4] <- "type"
result <- as.data.table(result)
result <- result[type!="ICT1"&type!="ICT2"]
result$Estimate <- as.numeric(result$Estimate)
result$lowerCI <- as.numeric(result$lowerCI)
result$upperCI <- as.numeric(result$upperCI)
result$y <- ifelse(result$type=="DQ", 4,
                   ifelse(result$type=="D-ICT", 3,
                          ifelse(result$type=="RRT", 2, 1)))
result$lab <- factor(result$y, labels=c("Weisheit\nder Vielen", "Crosswise-Model RRT",
                                   "Double List Experiment"))
p <- ggplot(data=result[type!="WoC"], aes(x=Estimate, group=type, y=lab))
p <- p + geom_vline(aes(xintercept=100), linetype=2)
p <- p + geom_segment(aes(yend=lab, x=lowerCI, xend=upperCI))
p <- p + geom_point()
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank(), text = element_text(size=16))
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_text(aes(y=y-.95, label=round(Estimate, 1)))
p <- p + xlab("Expected AfD Vote Share Among AfD Voters (in %)")
p <- p + ylab("")
p

##############################################################################
# Switching to and from AfD up to election, Main text shortly below Figure 3 #
##############################################################################

#################################################################################################
# Here were, further, use the following two datasets:                                           #
#                                                                                               #
# Blom, Annelies G.; Felderer, Barbara; H�hne, Jan K.; Krieger, Ulrich; Rettig, Tobias;         #
# SFB 884 �Political Economy of Reforms�, Universit�t Mannheim (2018):                          #
# German Internet Panel, Welle 31 - Core Study (September 2017). GESIS Datenarchiv, K�ln.       #
# ZA6905 Datenfile Version 1.0.0, doi:10.4232/1.13011                                           #
#                                                                                               #
# Internet: http://dx.doi.org/10.4232/1.13011                                                   #
# Gesis Study Number : ZA6905                                                                   #
#                                                                                               #
#                                                                                               #
# Blom, Annelies G.; Felderer, Barbara; H�hne, Jan Karem; Krieger, Ulrich; Rettig, Tobias;      #
# SFB 884 �Political Economy of Reforms�, Universit�t Mannheim (2018):                          #
# German Internet Panel, Welle 32 (November 2017). GESIS Datenarchiv, K�ln.                     #
# ZA6906 Datenfile Version 1.0.0, doi:10.4232/1.13043                                           #
#                                                                                               #
# Internet: http://dx.doi.org/10.4232/1.13043                                                   #
# Gesis Study Number : ZA6906                                                                   #
#################################################################################################

data <- data.full

gip31 <- as.data.table(read.dta13("ZA6905_v1-0-0.dta", convert.factors=F))
gip31 <- subset(gip31, select=c("id_g", "AA31471", "AA31472"))
#assign vote intention if not voted yet
gip31$vote <- ifelse(is.na(gip31$AA31471), gip31$AA31472, gip31$AA31471) 
gip31$afd31 <- ifelse(gip31$vote==11, 1, 0)
gip31$afd31 <- ifelse(is.na(gip31$vote), NA, gip31$afd31)
gip31 <- gip31[,list(id_g, afd31)]

gip32 <- as.data.table(read.dta13("ZA6906_v1-0-0.dta", convert.factors=F))
gip32$afd32 <- ifelse(gip32$AA32038==11, 1, 0)
gip32$afd32 <- ifelse(is.na(gip32$AA32038), NA, gip32$afd32)
gip32 <- gip32[,list(id_g, afd32)]

data <- merge(data, gip31, all.x=TRUE, by.x="id", by.y="id_g")
table(data$afd, data$afd31) #about .05 percent of respondents switch from or to AfD
(51+41)/(1787+41+51+103)

data <- merge(data, gip32, all.x=TRUE, by.x="id", by.y="id_g")
table(data$afd, data$afd32) #again, about .05 percent of respondents switch from or to AfD
(64+54)/(2147+64+54+112)

#these show that the overall effect of switchers is small on AfD vote share
round(table(data$afd)/nrow(data), digit=2)
round(table(data$afd31)/nrow(data), digit=2)
round(table(data$afd32)/nrow(data), digit=2)



############################################################
## sessionInfo() on the machine this R-script tested with: #
############################################################

## R version 3.5.1 (2018-07-02)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 7 x64 (build 7601) Service Pack 1

## Matrix products: default

## locale:
## [1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
## [3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Germany.1252    

## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     

## other attached packages:
## [1] readstata13_0.9.2 boot_1.3-20       ggplot2_3.0.0     data.table_1.11.8

## loaded via a namespace (and not attached):
##  [1] Rcpp_0.12.19     withr_2.1.2      crayon_1.3.4     dplyr_0.7.6     
##  [5] assertthat_0.2.0 grid_3.5.1       plyr_1.8.4       R6_2.2.2        
##  [9] gtable_0.2.0     magrittr_1.5     scales_1.0.0     pillar_1.3.0    
## [13] rlang_0.2.2      lazyeval_0.2.1   bindrcpp_0.2.2   tools_3.5.1     
## [17] glue_1.3.0       purrr_0.2.5      munsell_0.5.0    compiler_3.5.1  
## [21] pkgconfig_2.0.2  colorspace_1.3-2 tidyselect_0.2.4 bindr_0.1.1     
## [25] tibble_1.4.2    
